In [26]:
import pandas as pd
import numpy as np
In [ ]:
# import data 
In [27]:
dt = pd.read_csv("data.csv", sep=";")
In [28]:
dt.head()
Out[28]:
mois quantites_reelles
0 2020-02 1352
1 2020-03 2016
2 2020-04 1179
3 2020-05 601
4 2018-01 1087

we have data redundancies (mois)

In [29]:
dt.shape
Out[29]:
(414, 2)
In [30]:
dt.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   mois               414 non-null    object
 1   quantites_reelles  414 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.6+ KB
In [31]:
dt.mean()
Out[31]:
quantites_reelles    2833.379227
dtype: float64
In [110]:
# import covide data 
In [32]:
covid19_tn = pd.read_csv("covid19_tn.csv", sep=";")
In [33]:
covid19_tn["date"][1]
Out[33]:
'09/03/2020'

we want change the format to yyyy-mm like our data.csv

In [34]:
for i in range(len(covid19_tn)):
    covid19_tn["date"][i]=covid19_tn["date"][i][3:]
    covid19_tn["date"][i]=covid19_tn["date"][i].replace("/",'-')
    covid19_tn["date"][i]=covid19_tn["date"][i][3:]+"-"+covid19_tn["date"][i][:2]
    
C:\Users\raed\Anaconda3\envs\ML\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\raed\Anaconda3\envs\ML\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\raed\Anaconda3\envs\ML\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [35]:
#covid19_tn.to_csv("covid19_tn_For_model.csv",index=False)
In [36]:
covid19_tn.head()
Out[36]:
date case
0 2020-03 1.0
1 2020-03 1.0
2 2020-03 2.0
3 2020-03 5.0
4 2020-03 7.0
In [37]:
covid19_tn1=covid19_tn.groupby('date').max()
covid19_tn1.reset_index(inplace=True)
covid19_tn1 = covid19_tn1.rename(columns = {'index':'date'})
covid19_tn1=covid19_tn1.rename(columns = {'date': 'mois'}, inplace = False)
covid19_tn1
Out[37]:
mois case
0 2020-03 312.0
1 2020-04 980.0
2 2020-05 1076.0
3 2020-06 1172.0
4 2020-07 1514.0
5 2020-08 3685.0
6 2020-09 17405.0
7 2020-10 59813.0
8 2020-11 92475.0
In [38]:
covid19_tn1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   mois    9 non-null      object 
 1   case    9 non-null      float64
dtypes: float64(1), object(1)
memory usage: 272.0+ bytes
In [39]:
covid19_tn1['mois']=covid19_tn1['mois'].apply(str)
In [40]:
import seaborn as sns 
import matplotlib.pyplot as plt 
#g1 = plt.subplot(212)
g1 = sns.pointplot(x="mois", y ="case",data=covid19_tn1)
g1.set_xlabel("mois", fontsize=12)
g1.set_ylabel("case", fontsize=12)
g1.set_title("covide-16 tunisie", fontsize=20)

plt.subplots_adjust(wspace = 0.5, hspace = 0.5,top = 1)

plt.show()
In [41]:
import plotly.graph_objs as go 
In [42]:
import plotly.graph_objs as go 
import plotly.tools as tls # It's useful to we get some tools of plotly
#covid19_tn1['mois'] = list(covid19_tn1.index)
fig = go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn1['mois'], y=covid19_tn1['case'],
                    mode='lines+markers',name='Total Cases'))
fig.update_layout(title_text='Confirmed cases in Tunisia each MONTH',plot_bgcolor='rgb(250, 242, 242)')
fig.show()
Mar 2020Apr 2020May 2020Jun 2020Jul 2020Aug 2020Sep 2020Oct 2020Nov 2020020k40k60k80k
Confirmed cases in Tunisia each MONTH

we have to sum the "quantites_reelles" group by mois

In [43]:
df=dt.groupby('mois').sum()
In [44]:
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'mois'})
In [45]:
df
Out[45]:
mois quantites_reelles
0 2018-01 38731
1 2018-02 35837
2 2018-03 41470
3 2018-04 42260
4 2018-05 44181
5 2018-06 37538
6 2018-07 35772
7 2018-08 39691
8 2018-09 64329
9 2018-10 56186
10 2018-11 36473
11 2018-12 28906
12 2019-01 62317
13 2019-02 40296
14 2019-03 44123
15 2019-04 40335
16 2019-05 42041
17 2019-06 27989
18 2019-07 26646
19 2019-08 33304
20 2019-09 52272
21 2019-10 35119
22 2019-11 21722
23 2019-12 23927
24 2020-01 48463
25 2020-02 60080
26 2020-03 79955
27 2020-04 31613
28 2020-05 1443

merging the two data

In [46]:
df_finale=df.merge(covid19_tn1, on = ['mois'],how = 'outer')
In [47]:
df_finale
Out[47]:
mois quantites_reelles case
0 2018-01 38731.0 NaN
1 2018-02 35837.0 NaN
2 2018-03 41470.0 NaN
3 2018-04 42260.0 NaN
4 2018-05 44181.0 NaN
5 2018-06 37538.0 NaN
6 2018-07 35772.0 NaN
7 2018-08 39691.0 NaN
8 2018-09 64329.0 NaN
9 2018-10 56186.0 NaN
10 2018-11 36473.0 NaN
11 2018-12 28906.0 NaN
12 2019-01 62317.0 NaN
13 2019-02 40296.0 NaN
14 2019-03 44123.0 NaN
15 2019-04 40335.0 NaN
16 2019-05 42041.0 NaN
17 2019-06 27989.0 NaN
18 2019-07 26646.0 NaN
19 2019-08 33304.0 NaN
20 2019-09 52272.0 NaN
21 2019-10 35119.0 NaN
22 2019-11 21722.0 NaN
23 2019-12 23927.0 NaN
24 2020-01 48463.0 NaN
25 2020-02 60080.0 NaN
26 2020-03 79955.0 312.0
27 2020-04 31613.0 980.0
28 2020-05 1443.0 1076.0
29 2020-06 NaN 1172.0
30 2020-07 NaN 1514.0
31 2020-08 NaN 3685.0
32 2020-09 NaN 17405.0
33 2020-10 NaN 59813.0
34 2020-11 NaN 92475.0
In [48]:
df_finale.to_csv("df_finale.csv",index=False)

Prediction using Machine Learning Models¶

In [49]:
df.merge(covid19_tn1, on = ['mois'])
Out[49]:
mois quantites_reelles case
0 2020-03 79955 312.0
1 2020-04 31613 980.0
2 2020-05 1443 1076.0

Using machine learning techniques and with this dataset it's impossible to make a prediction here we can use GANs algorithm to generate more data ,but let's imagine that we have a big dataset and let's make some work

I don't want to add random values in quantity because they will augment the loss then i choose to ignore the column because i don't have A real data

We will use covid19_tn to make a predictions

In [56]:
covid19_tn_model = pd.read_csv("covid19_tn.csv", sep=";")
In [51]:
covid19_tn_model
Out[51]:
date case
0 03/03/2020 1.0
1 09/03/2020 1.0
2 10/03/2020 2.0
3 11/03/2020 5.0
4 12/03/2020 7.0
... ... ...
260 23/11/2020 88711.0
261 24/11/2020 89195.0
262 25/11/2020 90213.0
263 26/11/2020 91307.0
264 27/11/2020 92475.0

265 rows × 2 columns

In [5]:
import plotly.express as px
fig=px.bar(x=covid19_tn_model["date"],y=covid19_tn_model["case"])
fig.update_layout(title="Distribution of Number of  Cases",
                  xaxis_title="Date",yaxis_title="Number of Cases")
fig.show()
03/03/202013/03/202018/03/202023/03/202028/03/202002/04/202007/04/202012/04/202017/04/202022/04/202027/04/202002/05/202007/05/202012/05/202017/05/202022/05/202027/05/202001/06/202006/06/202011/06/202016/06/202021/06/202026/06/202001/07/202006/07/202011/07/202016/07/202021/07/202026/07/202031/07/202005/08/202010/08/202015/08/202020/08/202025/08/202030/08/202004/09/202009/09/202014/09/202019/09/202024/09/202029/09/202004/10/202009/10/202014/10/202019/10/202024/10/202029/10/202003/11/202008/11/202013/11/202018/11/202023/11/2020020k40k60k80k
Distribution of Number of CasesDateNumber of Cases
In [8]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn_model["date"],y=covid19_tn_model["case"],
                    mode='lines+markers',
                    name='Confirmed Cases'))

fig.update_layout(title="Number cases",
                 xaxis_title="Date",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
03/03/202014/03/202020/03/202026/03/202001/04/202007/04/202013/04/202019/04/202025/04/202001/05/202007/05/202013/05/202019/05/202025/05/202031/05/202006/06/202012/06/202018/06/202024/06/202030/06/202006/07/202012/07/202018/07/202024/07/202030/07/202005/08/202011/08/202017/08/202023/08/202029/08/202004/09/202010/09/202016/09/202022/09/202028/09/202004/10/202010/10/202016/10/202022/10/202028/10/202003/11/202009/11/202015/11/202021/11/202027/11/2020020k40k60k80k
casesDateNumber of Cases
In [ ]:
 

Linear Regression Model for Confirm Cases Prediction

In [9]:
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.metrics import mean_squared_error,r2_score
In [10]:
train_ml=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid_ml=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
model_scores=[]
In [11]:
lin_reg=LinearRegression(normalize=True)
In [20]:
lin_reg.fit(np.array(train_ml.index).reshape(-1,1),np.array(train_ml["case"]).reshape(-1,1))
Out[20]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
In [22]:
prediction_valid_linreg=lin_reg.predict(np.array(valid_ml.index).reshape(-1,1))
In [23]:
model_scores.append(np.sqrt(mean_squared_error(valid_ml["case"],prediction_valid_linreg)))
print("Root Mean Square Error for Linear Regression: ",np.sqrt(mean_squared_error(valid_ml["case"],prediction_valid_linreg)))
Root Mean Square Error for Linear Regression:  51886.249599323215
In [57]:
plt.figure(figsize=(11,6))
prediction_linreg=lin_reg.predict(np.array(covid19_tn_model.index).reshape(-1,1))
linreg_output=[]
for i in range(prediction_linreg.shape[0]):
    linreg_output.append(prediction_linreg[i][0])

fig=go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn_model["date"],y=covid19_tn_model["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=covid19_tn_model["date"], y=linreg_output,
                    mode='lines',name="Linear Regression Best Fit Line",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Linear Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
03/03/202014/03/202020/03/202026/03/202001/04/202007/04/202013/04/202019/04/202025/04/202001/05/202007/05/202013/05/202019/05/202025/05/202031/05/202006/06/202012/06/202018/06/202024/06/202030/06/202006/07/202012/07/202018/07/202024/07/202030/07/202005/08/202011/08/202017/08/202023/08/202029/08/202004/09/202010/09/202016/09/202022/09/202028/09/202004/10/202010/10/202016/10/202022/10/202028/10/202003/11/202009/11/202015/11/202021/11/202027/11/2020020k40k60k80k
Train Data for Confirmed CasesLinear Regression Best Fit LineConfirmed Cases Linear Regression PredictionDateConfirmed Cases
<Figure size 792x432 with 0 Axes>

The Linear Regression Model is absolutely falling aprat. As it is clearly visible that the trend of Confirmed Cases in absolutely not Linear.

Polynomial Regression for Prediction of Confirmed Cases

In [58]:
from sklearn.preprocessing import PolynomialFeatures
In [59]:
poly = PolynomialFeatures(degree = 8) 
In [60]:
train_ml=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid_ml=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
model_scores=[]
In [62]:
train_poly=poly.fit_transform(np.array(train_ml.index).reshape(-1,1))
valid_poly=poly.fit_transform(np.array(valid_ml.index).reshape(-1,1))
y=train_ml["case"]
In [63]:
linreg=LinearRegression(normalize=True)
linreg.fit(train_poly,y)
Out[63]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
In [64]:
prediction_poly=linreg.predict(valid_poly)
rmse_poly=np.sqrt(mean_squared_error(valid_ml["case"],prediction_poly))
model_scores.append(rmse_poly)
print("Root Mean Squared Error for Polynomial Regression: ",rmse_poly)
Root Mean Squared Error for Polynomial Regression:  11778.321488639067
In [66]:
comp_data=poly.fit_transform(np.array(covid19_tn_model.index).reshape(-1,1))
plt.figure(figsize=(11,6))
predictions_poly=linreg.predict(comp_data)

fig=go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn_model["date"], y=covid19_tn_model["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=covid19_tn_model["date"], y=predictions_poly,
                    mode='lines',name="Polynomial Regression Best Fit",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Polynomial Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",
                 legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
03/03/202014/03/202020/03/202026/03/202001/04/202007/04/202013/04/202019/04/202025/04/202001/05/202007/05/202013/05/202019/05/202025/05/202031/05/202006/06/202012/06/202018/06/202024/06/202030/06/202006/07/202012/07/202018/07/202024/07/202030/07/202005/08/202011/08/202017/08/202023/08/202029/08/202004/09/202010/09/202016/09/202022/09/202028/09/202004/10/202010/10/202016/10/202022/10/202028/10/202003/11/202009/11/202015/11/202021/11/202027/11/2020020k40k60k80k
Train Data for Confirmed CasesPolynomial Regression Best FitConfirmed Cases Polynomial Regression PredictionDateConfirmed Cases
<Figure size 792x432 with 0 Axes>

i dont like this model

Support Vector Machine ModelRegressor for Prediction of Confirmed Cases¶

In [67]:
from sklearn.svm import SVR
In [68]:
train_ml=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid_ml=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
model_scores=[]
In [69]:
#Intializing SVR Model
svm=SVR(C=1,degree=6,kernel='poly',epsilon=0.01)
In [71]:
#Fitting model on the training data
svm.fit(np.array(train_ml.index).reshape(-1,1),np.array(train_ml["case"]).reshape(-1,1))
C:\Users\raed\Anaconda3\envs\ML\lib\site-packages\sklearn\utils\validation.py:760: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

Out[71]:
SVR(C=1, cache_size=200, coef0=0.0, degree=6, epsilon=0.01, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
In [121]:
prediction_valid_svm=svm.predict(np.array(valid_ml.index).reshape(-1,1))
In [74]:
model_scores.append(np.sqrt(mean_squared_error(valid_ml["case"],prediction_valid_svm)))
print("Root Mean Square Error for Support Vectore Machine: ",np.sqrt(mean_squared_error(valid_ml["case"],prediction_valid_svm)))
Root Mean Square Error for Support Vectore Machine:  7251.9351697814445
In [76]:
plt.figure(figsize=(11,6))
prediction_svm=svm.predict(np.array(covid19_tn_model.index).reshape(-1,1))
fig=go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn_model["date"], y=covid19_tn_model["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=covid19_tn_model["date"], y=prediction_svm,
                    mode='lines',name="Support Vector Machine Best fit Kernel",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Support Vectore Machine Regressor Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
03/03/202014/03/202020/03/202026/03/202001/04/202007/04/202013/04/202019/04/202025/04/202001/05/202007/05/202013/05/202019/05/202025/05/202031/05/202006/06/202012/06/202018/06/202024/06/202030/06/202006/07/202012/07/202018/07/202024/07/202030/07/202005/08/202011/08/202017/08/202023/08/202029/08/202004/09/202010/09/202016/09/202022/09/202028/09/202004/10/202010/10/202016/10/202022/10/202028/10/202003/11/202009/11/202015/11/202021/11/202027/11/2020020k40k60k80k100k
Train Data for Confirmed CasesSupport Vector Machine Best fit KernelConfirmed Cases Support Vectore Machine Regressor PredictionDateConfirmed Cases
<Figure size 792x432 with 0 Axes>

as you can see SVM is the best model in our prediction

Time Series Forecasting

let's begin with Holt's Linear Model

In [77]:
from statsmodels.tsa.api import Holt,SimpleExpSmoothing,ExponentialSmoothing
In [78]:
train_ml=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
y_pred=valid.copy()
In [79]:
holt=Holt(np.asarray(train_ml["case"])).fit(smoothing_level=0.4, smoothing_slope=0.4,optimized=False)    
In [80]:
y_pred["Holt"]=holt.forecast(len(valid))
model_scores.append(np.sqrt(mean_squared_error(y_pred["case"],y_pred["Holt"])))
print("Root Mean Square Error Holt's Linear Model: ",np.sqrt(mean_squared_error(y_pred["case"],y_pred["Holt"])))
Root Mean Square Error Holt's Linear Model:  2014.1872133271897
In [83]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=covid19_tn_model.index, y=covid19_tn_model["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["case"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["Holt"],
                    mode='lines+markers',name="Prediction of Confirmed Cases",))
fig.update_layout(title="Confirmed Cases Holt's Linear Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
050100150200250020k40k60k80k100k
Train Data for Confirmed CasesValidation Data for Confirmed CasesPrediction of Confirmed CasesConfirmed Cases Holt's Linear Model PredictionDateConfirmed Cases

AR Model (using AUTO ARIMA)

In [88]:
from pmdarima.arima import auto_arima
import pmdarima as pm
In [86]:
#pip install pmdarima   ----- try this 
In [89]:
model_train=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
y_pred=valid.copy()
In [90]:
model_ar= auto_arima(covid19_tn_model["case"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=4,max_q=0,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_ar.fit(covid19_tn_model["case"])
 ARIMA(0,2,0)(0,0,0)[0] intercept   : AIC=4311.409, Time=1.08 sec
 ARIMA(1,2,0)(0,0,0)[0] intercept   : AIC=4194.484, Time=0.06 sec
 ARIMA(2,2,0)(0,0,0)[0] intercept   : AIC=4109.242, Time=0.21 sec
 ARIMA(3,2,0)(0,0,0)[0] intercept   : AIC=4103.417, Time=0.10 sec
 ARIMA(4,2,0)(0,0,0)[0] intercept   : AIC=4095.277, Time=0.10 sec
Total fit time: 1.591 seconds
Out[90]:
ARIMA(maxiter=50, method='lbfgs', order=(4, 2, 0), out_of_sample_size=0,
      scoring='mse', scoring_args={}, seasonal_order=(0, 0, 0, 0),
      start_params=None, suppress_warnings=True, trend=None,
      with_intercept=True)
In [91]:
prediction_ar=model_ar.predict(len(valid))
y_pred["AR Model Prediction"]=prediction_ar
In [92]:
model_scores.append(np.sqrt(mean_squared_error(y_pred["case"],y_pred["AR Model Prediction"])))
print("Root Mean Square Error for AR Model: ",np.sqrt(mean_squared_error(y_pred["case"],y_pred["AR Model Prediction"])))
Root Mean Square Error for AR Model:  15084.032144596507
In [93]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["case"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["AR Model Prediction"],
                    mode='lines+markers',name="Prediction of Confirmed Cases",))
fig.update_layout(title="Confirmed Cases AR Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
050100150200250020k40k60k80k100k
Train Data for Confirmed CasesValidation Data for Confirmed CasesPrediction of Confirmed CasesConfirmed Cases AR Model PredictionDateConfirmed Cases

not good

MA Model (using AUTO ARIMA)

In [95]:
model_train=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
y_pred=valid.copy()
In [96]:
model_ma= auto_arima(model_train["case"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=0,max_q=2,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_ma.fit(model_train["case"])
 ARIMA(0,2,0)(0,0,0)[0] intercept   : AIC=4090.775, Time=0.02 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=3874.683, Time=0.26 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=3842.893, Time=0.31 sec
Total fit time: 0.604 seconds
Out[96]:
ARIMA(maxiter=50, method='lbfgs', order=(0, 2, 2), out_of_sample_size=0,
      scoring='mse', scoring_args={}, seasonal_order=(0, 0, 0, 0),
      start_params=None, suppress_warnings=True, trend=None,
      with_intercept=True)
In [97]:
prediction_ma=model_ma.predict(len(valid))
y_pred["MA Model Prediction"]=prediction_ma
In [98]:
model_scores.append(np.sqrt(mean_squared_error(valid["case"],prediction_ma)))
print("Root Mean Square Error for MA Model: ",np.sqrt(mean_squared_error(valid["case"],prediction_ma)))
Root Mean Square Error for MA Model:  2217.486447676189
In [99]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["case"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["MA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases MA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
050100150200250020k40k60k80k100k
Train Data for Confirmed CasesValidation Data for Confirmed CasesPrediction for Confirmed CasesConfirmed Cases MA Model PredictionDateConfirmed Cases

goood model

ARIMA Model (using AUTOARIMA)

In [100]:
model_train=covid19_tn_model.iloc[:int(covid19_tn_model.shape[0]*0.95)]
valid=covid19_tn_model.iloc[int(covid19_tn_model.shape[0]*0.95):]
y_pred=valid.copy()
In [101]:
model_arima= auto_arima(model_train["case"],trace=True, error_action='ignore', start_p=1,start_q=1,max_p=3,max_q=3,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_arima.fit(model_train["case"])
 ARIMA(0,2,0)(0,0,0)[0] intercept   : AIC=4090.775, Time=0.03 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=3874.683, Time=0.21 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=3842.893, Time=0.26 sec
 ARIMA(0,2,3)(0,0,0)[0] intercept   : AIC=3822.394, Time=0.52 sec
 ARIMA(1,2,0)(0,0,0)[0] intercept   : AIC=3977.330, Time=0.04 sec
 ARIMA(1,2,1)(0,0,0)[0] intercept   : AIC=3849.577, Time=0.41 sec
 ARIMA(1,2,2)(0,0,0)[0] intercept   : AIC=3810.224, Time=0.53 sec
 ARIMA(1,2,3)(0,0,0)[0] intercept   : AIC=3821.494, Time=0.41 sec
 ARIMA(2,2,0)(0,0,0)[0] intercept   : AIC=3892.401, Time=0.06 sec
 ARIMA(2,2,1)(0,0,0)[0] intercept   : AIC=3843.314, Time=0.37 sec
 ARIMA(2,2,2)(0,0,0)[0] intercept   : AIC=3819.415, Time=0.40 sec
 ARIMA(2,2,3)(0,0,0)[0] intercept   : AIC=3820.987, Time=0.53 sec
 ARIMA(3,2,0)(0,0,0)[0] intercept   : AIC=3886.762, Time=0.07 sec
 ARIMA(3,2,1)(0,0,0)[0] intercept   : AIC=3842.477, Time=0.52 sec
 ARIMA(3,2,2)(0,0,0)[0] intercept   : AIC=3821.019, Time=0.88 sec
Total fit time: 5.263 seconds
Out[101]:
ARIMA(maxiter=50, method='lbfgs', order=(1, 2, 2), out_of_sample_size=0,
      scoring='mse', scoring_args={}, seasonal_order=(0, 0, 0, 0),
      start_params=None, suppress_warnings=True, trend=None,
      with_intercept=True)
In [102]:
prediction_arima=model_arima.predict(len(valid))
y_pred["ARIMA Model Prediction"]=prediction_arima
In [103]:
model_scores.append(np.sqrt(mean_squared_error(valid["case"],prediction_arima)))
print("Root Mean Square Error for ARIMA Model: ",np.sqrt(mean_squared_error(valid["case"],prediction_arima)))
Root Mean Square Error for ARIMA Model:  1133.617304492231
In [104]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["case"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["ARIMA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases ARIMA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
050100150200250020k40k60k80k100k
Train Data for Confirmed CasesValidation Data for Confirmed CasesPrediction for Confirmed CasesConfirmed Cases ARIMA Model PredictionDateConfirmed Cases

SARIMA Model (using AUTO ARIMA)

In [106]:
model_sarima= auto_arima(model_train["case"],trace=True, error_action='ignore', 
                         start_p=0,start_q=0,max_p=2,max_q=2,m=7,
                   suppress_warnings=True,stepwise=True,seasonal=True)
model_sarima.fit(model_train["case"])
Performing stepwise search to minimize aic
 ARIMA(0,2,0)(1,0,1)[7]             : AIC=4070.373, Time=0.20 sec
 ARIMA(0,2,0)(0,0,0)[7]             : AIC=4088.796, Time=0.04 sec
 ARIMA(1,2,0)(1,0,0)[7]             : AIC=3968.481, Time=0.13 sec
 ARIMA(0,2,1)(0,0,1)[7]             : AIC=3859.356, Time=0.34 sec
 ARIMA(0,2,1)(0,0,0)[7]             : AIC=3876.171, Time=0.09 sec
 ARIMA(0,2,1)(1,0,1)[7]             : AIC=3861.200, Time=0.46 sec
 ARIMA(0,2,1)(0,0,2)[7]             : AIC=3861.317, Time=0.87 sec
 ARIMA(0,2,1)(1,0,0)[7]             : AIC=3860.242, Time=0.29 sec
 ARIMA(0,2,1)(1,0,2)[7]             : AIC=inf, Time=1.13 sec
 ARIMA(0,2,0)(0,0,1)[7]             : AIC=4067.259, Time=0.13 sec
 ARIMA(1,2,1)(0,0,1)[7]             : AIC=3841.472, Time=0.24 sec
 ARIMA(1,2,1)(0,0,0)[7]             : AIC=3851.237, Time=0.12 sec
 ARIMA(1,2,1)(1,0,1)[7]             : AIC=3840.714, Time=0.90 sec
 ARIMA(1,2,1)(1,0,0)[7]             : AIC=3841.931, Time=0.19 sec
 ARIMA(1,2,1)(2,0,1)[7]             : AIC=inf, Time=1.46 sec
 ARIMA(1,2,1)(1,0,2)[7]             : AIC=inf, Time=1.46 sec
 ARIMA(1,2,1)(0,0,2)[7]             : AIC=3843.307, Time=0.61 sec
 ARIMA(1,2,1)(2,0,0)[7]             : AIC=3843.904, Time=0.40 sec
 ARIMA(1,2,1)(2,0,2)[7]             : AIC=3838.119, Time=1.23 sec
 ARIMA(0,2,1)(2,0,2)[7]             : AIC=3860.688, Time=1.40 sec
 ARIMA(1,2,0)(2,0,2)[7]             : AIC=3968.780, Time=0.84 sec
 ARIMA(2,2,1)(2,0,2)[7]             : AIC=3830.364, Time=1.58 sec
 ARIMA(2,2,1)(1,0,2)[7]             : AIC=inf, Time=1.46 sec
 ARIMA(2,2,1)(2,0,1)[7]             : AIC=inf, Time=1.22 sec
 ARIMA(2,2,1)(1,0,1)[7]             : AIC=3832.477, Time=0.81 sec
 ARIMA(2,2,0)(2,0,2)[7]             : AIC=inf, Time=1.08 sec
 ARIMA(2,2,2)(2,0,2)[7]             : AIC=inf, Time=1.88 sec
 ARIMA(1,2,2)(2,0,2)[7]             : AIC=inf, Time=1.66 sec
 ARIMA(2,2,1)(2,0,2)[7] intercept   : AIC=3835.359, Time=1.53 sec

Best model:  ARIMA(2,2,1)(2,0,2)[7]          
Total fit time: 24.021 seconds
Out[106]:
ARIMA(maxiter=50, method='lbfgs', order=(2, 2, 1), out_of_sample_size=0,
      scoring='mse', scoring_args={}, seasonal_order=(2, 0, 2, 7),
      start_params=None, suppress_warnings=True, trend=None,
      with_intercept=False)
In [107]:
prediction_sarima=model_sarima.predict(len(valid))
y_pred["SARIMA Model Prediction"]=prediction_sarima
In [108]:
model_scores.append(np.sqrt(mean_squared_error(y_pred["case"],y_pred["SARIMA Model Prediction"])))
print("Root Mean Square Error for SARIMA Model: ",np.sqrt(mean_squared_error(y_pred["case"],y_pred["SARIMA Model Prediction"])))
Root Mean Square Error for SARIMA Model:  2457.0755041337648
In [109]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["case"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["case"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["SARIMA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases SARIMA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()
050100150200250020k40k60k80k100k
Train Data for Confirmed CasesValidation Data for Confirmed CasesPrediction for Confirmed CasesConfirmed Cases SARIMA Model PredictionDateConfirmed Cases

feedback information

SVM AND ARIMA ARE THE BEST MODELS

In [ ]: